##### Replication files for: A Note on Increases in Inattentive Online Survey-Takers Since 2020
##### Ternovski and Orr 2022

# install.packages("dplyr")
# install.packages("ggplot2")
# install.packages("ggpubr")
# install.packages("estimatr")
# install.packages("rms")

library(dplyr)
library(ggplot2)
library(ggpubr)
library(estimatr)
library(rms)

study_k <- read.csv("study_k_public.csv", as.is = TRUE)
study_t <- read.csv("study_t_public.csv", as.is = TRUE)
study_s <- read.csv("study_s_public.csv", as.is = TRUE)
study_o <- read.csv("study_o_public.csv", as.is = TRUE)

##### Data: Sample size  #####

# Total consenting participants
nrow(study_k) + nrow(study_t) + nrow(study_s) + nrow(study_o)

# Study K
# Date range
c(min(study_k$date), max(study_k$date))

# Sample size
nrow(study_k)

# Study T
# Date range
table(study_t$date)

# Sample size
nrow(study_t)

# Study S
# Date range
c(min(substr(study_s$starttime, 1, 10)), max(substr(study_s$starttime, 1, 10)))

# Sample size
nrow(study_s)

# Study O
# Dates
table(substr(study_o$RecordedDate[study_o$study == "Study 1"], 1, 10))
table(substr(study_o$RecordedDate[study_o$study == "Study 2"], 1, 10))

# Sample sizes
sum(study_o$Consent[study_o$study == "Study 1"] == 1) 
sum(study_o$Consent[study_o$study == "Study 2"] == 1) 


##### Figure 1: Passage Rates Over Time #####

study_k$week <- as.Date(substr(study_k$date, 1, 10), format = "%Y-%m-%d") %>% format("%W-%Y")
study_t$week <- as.Date(recode(study_t$study, "1" = "2020-04-12", "2" = "2020-05-06", "3" = "2020-05-17", 
                           "4" = "2020-07-22", "5" = "2020-09-15", "6" = "2020-10-31", "7" = "2020-11-08"), 
                   format = "%Y-%m-%d") %>% format("%W-%Y")
study_s$week <- as.Date(substr(study_s$starttime, 1, 10), format = "%Y-%m-%d") %>% format("%W-%Y")
study_o$week <- as.Date(substr(study_o$RecordedDate, 1, 10), format = "%Y-%m-%d") %>% format("%W-%Y")

study_k_rates <- group_by(study_k, week) %>% 
  summarise(n_consent = n(),
            pass_given_consent = mean(drop == "keep"))

study_s_rates <- group_by(study_s[study_s$week != "14-2020",], week) %>% 
  summarise(n_consent = n(),
            pass_given_consent = 1 - mean(dropped))

study_t_rates <- group_by(study_t, week) %>% 
  summarise(n_consent = n(),
            pass_given_consent = mean(pass_att == 1))

study_o_rates <- group_by(study_o[study_o$study == "Study 2",], week) %>% 
  summarise(n_consent = n(),
            pass_given_consent = mean(AC_understand == 1 & AC_read == 1))

study_k_rates$survey <- "Study K"
study_s_rates$survey <- "Study S"
study_t_rates$survey <- "Study T"
study_o_rates$survey <- "Study O"


rates <- rbind(study_t_rates, study_k_rates, study_s_rates, study_o_rates)

rates$survey <- factor(rates$survey, levels = c("Study K", "Study S", "Study T", "Study O"))

rates$week <- paste0(rates$week, "-1")
rates$week_date <- as.Date(rates$week, format = "%U-%Y-%u")
rates$week_num <- as.numeric(substr(rates$week, 1, 2)) + 52*(substr(rates$week, 4, 7) == "2021")

png("Figure1_6mo_trend.png", width = 8, height = 7, units = "in", res = 200)
ggplot(rates[rates$survey != "Orr" & rates$n_consent > 10 & 
               rates$week_date > as.Date("2020-01-01") & rates$week_date < as.Date("2020-07-01"),], 
       aes(x = week_date, y = pass_given_consent, color = survey, shape = survey)) + #as.numeric(month)
    annotate("rect", ymin = -Inf, ymax = Inf, xmin = as.Date("2020-03-19"), xmax = as.Date("2020-07-01"),
            alpha = 0.2,  fill = "grey") +
  geom_point(aes(size = n_consent), stroke = 1.5) +
  geom_smooth(method = "lm", aes(weight = n_consent, fill = survey), alpha = 0.15) + 
  scale_x_date(breaks = "month", date_labels = "%b-%y", limits = c(as.Date("2020-01-01"), as.Date("2020-07-01")), expand = c(0, 0)) +
  scale_shape_manual(values = c(2, 17, 6)) +
  scale_size(breaks = c(30, 1000, 8000, 16000), guide = guide_legend(override.aes = list(shape = 17))) +
  ylim(-0.02, 1) +
  labs(x = "Month", y = "Attention check passage rate among consenting participants", 
       size = "Sample Size", color = "Study", shape = "Study", fill = "Study") +
  theme_bw()
dev.off()


# Regression underlying Figure 1 and SM

study_k_decline_lm <- lm_robust(pass_given_consent ~ week_num, weights = n_consent, 
                    data = rates[rates$survey == "Study K" & rates$n_consent > 10 & 
                    rates$week_date > as.Date("2020-01-01") & rates$week_date < as.Date("2020-07-01"),])

study_s_decline_lm <- lm_robust(pass_given_consent ~ week_num, weights = n_consent, 
                    data = rates[rates$survey == "Study S" & rates$n_consent > 10 & 
                    rates$week_date > as.Date("2020-01-01") & rates$week_date < as.Date("2020-07-01"),])

study_t_decline_lm <- lm_robust(pass_given_consent ~ week_num, weights = n_consent, 
                    data = rates[rates$survey == "Study T" & rates$n_consent > 10 & 
                    rates$week_date > as.Date("2020-01-01") & rates$week_date < as.Date("2020-07-01"),])

study_k_decline_lm
study_k_decline_lm$nobs
sum(study_k_decline_lm$weights)

study_s_decline_lm
study_s_decline_lm$nobs
sum(study_s_decline_lm$weights)

study_t_decline_lm
study_t_decline_lm$nobs
sum(study_t_decline_lm$weights)


decline6mo_lm <- lm_robust(pass_given_consent ~ week_num + survey, weights = n_consent, 
                    data = rates[rates$survey != "Study O" & rates$n_consent > 10 & 
                    rates$week_date > as.Date("2020-01-01") & rates$week_date < as.Date("2020-07-01"),])
decline6mo_lm
decline6mo_lm$nobs
sum(decline6mo_lm$weights)


decline12mo_lm <- lm_robust(pass_given_consent ~ week_num + survey, weights = n_consent, 
                    data = rates[rates$survey != "Study O" & rates$n_consent > 10 & 
                    rates$week_date > as.Date("2020-01-01") & rates$week_date < as.Date("2021-01-01"),])
decline12mo_lm
decline12mo_lm$nobs
sum(decline12mo_lm$weights)


##### Figure 2: A Survey Without Attention Screening had Lower Response Stability and Weaker Treatment Effects #####

# Mean absolute error, study #, outcome (exp = A, race = B, violent = C)

mae1_exp_est <- mean(abs(study_o$exp_post[study_o$study == "Study 1" & study_o$Z_druginfo == "none"] - study_o$exp_pre[study_o$study == "Study 1" & study_o$Z_druginfo == "none"]), na.rm = T)
mae2_exp_est <- mean(abs(study_o$exp_post[study_o$study == "Study 2" & study_o$Z_druginfo == "none"] - study_o$exp_pre[study_o$study == "Study 2" & study_o$Z_druginfo == "none"]), na.rm = T)

mae1_race_est <- mean(abs(study_o$race_post[study_o$study == "Study 1" & study_o$Z_druginfo == "none"] - study_o$race_pre[study_o$study == "Study 1" & study_o$Z_druginfo == "none"]), na.rm = T)
mae2_race_est <- mean(abs(study_o$race_post[study_o$study == "Study 2" & study_o$Z_druginfo == "none"] - study_o$race_pre[study_o$study == "Study 2" & study_o$Z_druginfo == "none"]), na.rm = T)

mae1_violent_est <- mean(abs(study_o$violent_post[study_o$study == "Study 1" & study_o$Z_druginfo == "none"] - study_o$violent_pre[study_o$study == "Study 1" & study_o$Z_druginfo == "none"]), na.rm = T)
mae2_violent_est <- mean(abs(study_o$violent_post[study_o$study == "Study 2" & study_o$Z_druginfo == "none"] - study_o$violent_pre[study_o$study == "Study 2" & study_o$Z_druginfo == "none"]), na.rm = T)

mae1_exp_est_treat <- mean(abs(study_o$exp_post[study_o$study == "Study 1" & study_o$Z_druginfo == "exp"] - study_o$exp_pre[study_o$study == "Study 1" & study_o$Z_druginfo == "exp"]), na.rm = T)
mae2_exp_est_treat <- mean(abs(study_o$exp_post[study_o$study == "Study 2" & study_o$Z_druginfo == "exp"] - study_o$exp_pre[study_o$study == "Study 2" & study_o$Z_druginfo == "exp"]), na.rm = T)

reps <- 10000

mae1_exp <- rep(NA, reps)
mae2_exp <- rep(NA, reps)
mae1_race <- rep(NA, reps)
mae2_race <- rep(NA, reps)
mae1_violent <- rep(NA, reps)
mae2_violent <- rep(NA, reps)
mae1_exp_treat <- rep(NA, reps)
mae2_exp_treat <- rep(NA, reps)

set.seed(812020)

for(i in 1:reps){
  dat_temp <- study_o[sample(1:nrow(study_o), replace = TRUE),]
  mae1_exp[i] <- mean(abs(dat_temp$exp_post[dat_temp$study == "Study 1" & dat_temp$Z_druginfo == "none"] - 
                           dat_temp$exp_pre[dat_temp$study == "Study 1" & dat_temp$Z_druginfo == "none"]), na.rm = T)
  mae2_exp[i] <- mean(abs(dat_temp$exp_post[dat_temp$study == "Study 2" & dat_temp$Z_druginfo == "none"] - 
                           dat_temp$exp_pre[dat_temp$study == "Study 2" & dat_temp$Z_druginfo == "none"]), na.rm = T)
  
  mae1_race[i] <- mean(abs(dat_temp$race_post[dat_temp$study == "Study 1" & dat_temp$Z_druginfo == "none"] - 
                           dat_temp$race_pre[dat_temp$study == "Study 1" & dat_temp$Z_druginfo == "none"]), na.rm = T)
  mae2_race[i] <- mean(abs(dat_temp$race_post[dat_temp$study == "Study 2" & dat_temp$Z_druginfo == "none"] - 
                           dat_temp$race_pre[dat_temp$study == "Study 2" & dat_temp$Z_druginfo == "none"]), na.rm = T)
  
  mae1_violent[i] <- mean(abs(dat_temp$violent_post[dat_temp$study == "Study 1" & dat_temp$Z_druginfo == "none"] - 
                           dat_temp$violent_pre[dat_temp$study == "Study 1" & dat_temp$Z_druginfo == "none"]), na.rm = T)
  mae2_violent[i] <- mean(abs(dat_temp$violent_post[dat_temp$study == "Study 2" & dat_temp$Z_druginfo == "none"] - 
                           dat_temp$violent_pre[dat_temp$study == "Study 2" & dat_temp$Z_druginfo == "none"]), na.rm = T)
  
  mae1_exp_treat[i] <- mean(abs(dat_temp$exp_post[dat_temp$study == "Study 1" & dat_temp$Z_druginfo == "exp"] - 
                           dat_temp$exp_pre[dat_temp$study == "Study 1" & dat_temp$Z_druginfo == "exp"]), na.rm = T)
  mae2_exp_treat[i] <- mean(abs(dat_temp$exp_post[dat_temp$study == "Study 2" & dat_temp$Z_druginfo == "exp"] - 
                           dat_temp$exp_pre[dat_temp$study == "Study 2" & dat_temp$Z_druginfo == "exp"]), na.rm = T)
}

stability <- data.frame(MSE = c(mae1_exp_est, mae2_exp_est, mae1_race_est, mae2_race_est, mae1_violent_est, mae2_violent_est),
                        se = c(sd(mae1_exp), sd(mae2_exp), sd(mae1_race), sd(mae2_race), sd(mae1_violent), sd(mae2_violent)),
                        question = c("A", "A", "B", "B", "C", "C"),
                        group = "Control",
                        study = rep(c("Study 1 (no screen)", "Study 2 (screen)"), 3))

# Mean absolute error, study #, outcome, treatment group, pre/post

mae1_exp_control1_est <- mean(abs(study_o$exp_pre[study_o$study == "Study 1" & study_o$Z_druginfo == "none"] - 47), na.rm = T)
mae2_exp_control1_est <- mean(abs(study_o$exp_pre[study_o$study == "Study 2" & study_o$Z_druginfo == "none"] - 47), na.rm = T)

mae1_exp_control2_est <- mean(abs(study_o$exp_post[study_o$study == "Study 1" & study_o$Z_druginfo == "none"] - 47), na.rm = T)
mae2_exp_control2_est <- mean(abs(study_o$exp_post[study_o$study == "Study 2" & study_o$Z_druginfo == "none"] - 47), na.rm = T)
  
mae1_exp_treat1_est <- mean(abs(study_o$exp_pre[study_o$study == "Study 1" & study_o$Z_druginfo == "exp"] - 47), na.rm = T)
mae2_exp_treat1_est <- mean(abs(study_o$exp_pre[study_o$study == "Study 2" & study_o$Z_druginfo == "exp"] - 47), na.rm = T)

mae1_exp_treat2_est <- mean(abs(study_o$exp_post[study_o$study == "Study 1" & study_o$Z_druginfo == "exp"] - 47), na.rm = T)
mae2_exp_treat2_est <- mean(abs(study_o$exp_post[study_o$study == "Study 2" & study_o$Z_druginfo == "exp"] - 47), na.rm = T)

reps <- 10000

mae1_exp_control1 <- rep(NA, reps)
mae2_exp_control1 <- rep(NA, reps)
mae1_exp_control2 <- rep(NA, reps)
mae2_exp_control2 <- rep(NA, reps)

mae1_exp_treat1 <- rep(NA, reps)
mae2_exp_treat1 <- rep(NA, reps)
mae1_exp_treat2 <- rep(NA, reps)
mae2_exp_treat2 <- rep(NA, reps)

set.seed(812020)

for(i in 1:reps){
  dat_temp <- study_o[sample(1:nrow(study_o), replace = TRUE),]
  mae1_exp_control1[i] <- mean(abs(dat_temp$exp_pre[dat_temp$study == "Study 1" & dat_temp$Z_druginfo == "none"] - 47), na.rm = T)
  mae2_exp_control1[i] <- mean(abs(dat_temp$exp_pre[dat_temp$study == "Study 2" & dat_temp$Z_druginfo == "none"] - 47), na.rm = T)
  
  mae1_exp_control2[i] <- mean(abs(dat_temp$exp_post[dat_temp$study == "Study 1" & dat_temp$Z_druginfo == "none"] - 47), na.rm = T)
  mae2_exp_control2[i] <- mean(abs(dat_temp$exp_post[dat_temp$study == "Study 2" & dat_temp$Z_druginfo == "none"] - 47), na.rm = T)
    
  mae1_exp_treat1[i] <- mean(abs(dat_temp$exp_pre[dat_temp$study == "Study 1" & dat_temp$Z_druginfo == "exp"] - 47), na.rm = T)
  mae2_exp_treat1[i] <- mean(abs(dat_temp$exp_pre[dat_temp$study == "Study 2" & dat_temp$Z_druginfo == "exp"] - 47), na.rm = T)
  
  mae1_exp_treat2[i] <- mean(abs(dat_temp$exp_post[dat_temp$study == "Study 1" & dat_temp$Z_druginfo == "exp"] - 47), na.rm = T)
  mae2_exp_treat2[i] <- mean(abs(dat_temp$exp_post[dat_temp$study == "Study 2" & dat_temp$Z_druginfo == "exp"] - 47), na.rm = T)
}

manipulation <- data.frame(MSE = c(mae1_exp_control2_est - mae1_exp_treat2_est, mae2_exp_control2_est - mae2_exp_treat2_est),
                           se = c(sd(mae1_exp_treat2 - mae1_exp_control2), sd(mae2_exp_treat2 - mae2_exp_control2)),
                           question = "A",
                           group = "Treatment vs Control", 
                           study = c("Study 1 (no screen)", "Study 2 (screen)"))

# Treatment effects
manipulation

# % smaller
1 - (mae1_exp_treat2_est - mae1_exp_control2_est)/(mae2_exp_treat2_est - mae2_exp_control2_est)

# Absolute difference
(mae2_exp_treat2_est - mae2_exp_control2_est) - (mae1_exp_treat2_est - mae1_exp_control2_est)
sd((mae2_exp_treat2 - mae2_exp_control2) - (mae1_exp_treat2 - mae1_exp_control2))
mean((mae2_exp_treat2 - mae2_exp_control2) > (mae1_exp_treat2 - mae1_exp_control2))


# Figure
fig2 <- rbind(stability, manipulation)
fig2$label <- c(rep("Control group mean absolute difference\nbetween 1st and 2nd response", 6), 
                rep("Manipulation ATE: reduction in mean \nabsolute difference between truth and 2nd response", 2))

png("stability_manip.png", width = 8, height = 7, units = "in", res = 200)
ggplot(fig2, aes(x = question, y = MSE, color = study)) +
  geom_point(position = position_dodge(width = 0.3)) +
  geom_errorbar(aes(ymin = MSE - 1.96*se, ymax = MSE + 1.96*se), position = position_dodge(width = 0.3), width = 0.1) +
  facet_wrap(vars(label)) + 
  scale_color_manual(values = c("grey65", "grey22")) +
  ylim(0, 13) +
  labs(x = "Question", y = "Mean absolute difference", color = "Study") +
  theme_bw() +
  theme(strip.background = element_blank())
dev.off()

# Figure note
mean(mae1_exp < mae2_exp)
mean(mae1_race < mae2_race)
mean(mae1_violent < mae2_violent)

mean((mae2_exp_treat2 - mae2_exp_control2) > (mae1_exp_treat2 - mae1_exp_control2))


###### Figure 3: A Survey Without Attention Screening Required Larger Sample Sizes to Detect a Treatment Manipulation Effect #####

study_o_screen <- study_o[study_o$study == "Study 2" & !is.na(study_o$intro_time),]
study_o_noscreen <- study_o[study_o$study == "Study 1" & !is.na(study_o$intro_time),]

reps <- 5000
Ns <- seq(50, 500, by = 10)

mae1_exp_p <- matrix(NA, nrow = reps, ncol = length(Ns))
mae2_exp_p <- matrix(NA, nrow = reps, ncol = length(Ns))


set.seed(812020)

for(j in 1:length(Ns)){
  for(i in 1:reps){
    dat_1_temp <- study_o_noscreen[sample(1:nrow(study_o_noscreen), Ns[j], replace = T),]
    dat_2_temp <- study_o_screen[sample(1:nrow(study_o_screen), Ns[j], replace = T),]
  
    mae1_exp_p[i, j] <- summary(lm(I(abs(exp_post - 47)) ~ Z_druginfo, data = dat_1_temp))$coefficients[2,4]
    mae2_exp_p[i, j] <- summary(lm(I(abs(exp_post - 47)) ~ Z_druginfo, data = dat_2_temp))$coefficients[2,4]
  }
}


power_sim <- data.frame(Sample = rep(Ns, 2),
                        Power = c(colMeans(mae1_exp_p < 0.05), colMeans(mae2_exp_p < 0.05)),
                        Study = rep(c("Study 1 (no screen)", "Study 2 (screen)"), each = length(Ns)))

power_sim

png("power_observed_effects.png", height = 5, width = 6.5, units = "in", res = 200)
ggplot(power_sim, aes(x = Sample, y = Power, color = Study)) +
  geom_point() +
  geom_hline(yintercept = 0.8, linetype = "dashed") +
  scale_color_manual(values = c("grey65", "grey22")) +
  labs(x = "Sample Size") +
  theme_bw()
dev.off()


# Exact size needed for 0.8 power

reps_80 <- 5000
Ns_80 <- seq(60, 70, by = 1)

mae1_exp_p_80 <- matrix(NA, nrow = reps_80, ncol = length(Ns_80))
mae2_exp_p_80 <- matrix(NA, nrow = reps_80, ncol = length(Ns_80))


set.seed(812020)

for(j in 1:length(Ns_80)){
  for(i in 1:reps_80){
    dat_1_temp <- study_o_noscreen[sample(1:nrow(study_o_noscreen), Ns_80[j], replace = T),]
    dat_2_temp <- study_o_screen[sample(1:nrow(study_o_screen), Ns_80[j], replace = T),]
  
    mae1_exp_p_80[i, j] <- summary(lm(I(abs(exp_post - 47)) ~ Z_druginfo, data = dat_1_temp))$coefficients[2,4]
    mae2_exp_p_80[i, j] <- summary(lm(I(abs(exp_post - 47)) ~ Z_druginfo, data = dat_2_temp))$coefficients[2,4]
  }
}

power_sim_80 <- data.frame(Sample = rep(Ns_80, 2),
                        Power = c(colMeans(mae1_exp_p_80 < 0.05), colMeans(mae2_exp_p_80 < 0.05)),
                        Study = rep(c("Study 1 (no screen)", "Study 2 (screen)"), each = length(Ns_80)))

power_sim_80


##### Demographics of Attentive and Inattentive Respondents #####

group_by(study_t[study_t$consent == 1,], pass_att == 1) %>% 
  dplyr::summarise(Romney = mean(romney_party_guess, na.rm = TRUE)*100,
                   Age_match = mean(agematch_dob, na.rm = TRUE)*100)

group_by(study_s, dropped) %>% 
  dplyr::summarise(State_match = mean(zip_state_match, na.rm = TRUE)*100)

chisq.test(study_t$pass_att, study_t$romney_party_guess)
chisq.test(study_s$dropped, study_s$zip_state_match)
chisq.test(study_t$pass_att, study_t$agematch_dob)


##### Table 1: Inattentive Respondents Differ Across Demographics #####

group_by(study_k[study_k$consent == "Yes",], drop == "keep") %>% 
  dplyr::summarise(Age = mean(age, na.rm = TRUE),
            Female = mean(female, na.rm = TRUE)*100,
            HHIncome_high = mean(hhi_greater_110k, na.rm = TRUE)*100,
            HHIncome_low = mean(hhi_lessthank_15k, na.rm = TRUE)*100,
            College = mean(college_educ, na.rm = TRUE)*100,
            Democrat = mean(democrat, na.rm = TRUE)*100)

group_by(study_t[study_t$consent == 1,], pass_att == 1) %>% 
  dplyr::summarise(Age = mean(age, na.rm = TRUE),
            Female = mean(female, na.rm = TRUE)*100,
            HHIncome_high = mean(hhi_100k, na.rm = TRUE)*100,
            HHIncome_low = mean(hhi_min, na.rm = TRUE)*100,
            College = mean(college, na.rm = TRUE)*100,
            Democrat = mean(dem, na.rm = TRUE)*100)

lrm(drop == "keep" ~ age + female + hhi_greater_110k + hhi_lessthank_15k + 
      college_educ + democrat, data = study_k[study_k$consent == "Yes",])

lrm(pass_att ~ age + female + hhi_100k + hhi_min + college + dem, 
    data = study_t[study_t$consent == 1,])


##### Figure 4: Mixed Evidence About Trends in Data Quality Among Attentive Respondents #####

study_k_trump_weeks <- unique(study_k$week[!is.na(study_k$trumpapprove)])
study_k_rates_pass <- group_by(study_k[study_k$consent == "Yes" & study_k$drop == "keep" & study_k$week %in% c(study_k_trump_weeks),], week) %>% 
  summarise(n = n(),
            match = cor(trumpapprove, trumpfav, use = "complete.obs"))

study_s_rates_pass <- group_by(study_s[study_s$dropped == 0 & study_s$week != "2020-14",], week) %>% 
  summarise(n = n(),
            match = mean(zip_state_match, na.rm = T))

study_k_rates_pass$survey <- "Kalla"
study_s_rates_pass$survey <- "Schaffner"

rates_pass <- rbind(study_k_rates_pass, study_s_rates_pass)

rates_pass$survey <- factor(rates_pass$survey, levels = c("Kalla", "Schaffner"))

rates_pass$week <- paste0(rates_pass$week, "-1")
rates_pass$week_date <- as.Date(rates_pass$week, format = "%U-%Y-%u")

rates_pass$label = ifelse(rates_pass$survey == "Kalla", "Correlation between Trump favorability and approval (Study K)", "State match rate (Study S)")
rates_pass$label2 = ifelse(rates_pass$survey == "Kalla", "Study K", "Study S")

k_qualtrend <- ggplot(rates_pass[rates_pass$n > 10 & rates_pass$survey == "Kalla",], aes(x = week_date, y = match)) +
  geom_point(aes(size = n), stroke = 1.5, shape = 17) +
  geom_smooth(method = "lm", aes(weight = n), alpha = 0.15) +
  facet_wrap(vars(label2)) +
  scale_x_date(breaks = "month", date_labels = "%b-%y", limits = c(min(rates_pass$week_date), max(rates_pass$week_date))) +
  scale_size(breaks = c(100, 1000, 10000), limits= c(41, 13633),
             guide = guide_legend(override.aes = list(shape = 17))) +
  ylim(0.85, 1) +
  labs(x = "Month", y = "Correlation between Trump favorability and approval", 
       size = "Sample Size", color = "Study", shape = "Study", fill = "Study") +
  theme_bw() +
  theme(strip.background = element_blank()) 

s_qualtrend <- ggplot(rates_pass[rates_pass$n > 10 & rates_pass$survey == "Schaffner",], aes(x = week_date, y = match)) +
  geom_point(aes(size = n), stroke = 1.5, shape = 17) +
  geom_smooth(method = "lm", aes(weight = n), alpha = 0.15) +
  facet_wrap(vars(label2)) +
  scale_x_date(breaks = "month", date_labels = "%b-%y", limits = c(min(rates_pass$week_date), max(rates_pass$week_date))) +
  scale_size(breaks = c(100, 1000, 10000), limits= c(41, 13633),
             guide = guide_legend(override.aes = list(shape = 17))) +
  ylim(0.85, 1) +
  labs(x = "Month", y = "State match rate", 
       size = "Sample Size", color = "Study", shape = "Study", fill = "Study") +
  theme_bw() +
  theme(strip.background = element_blank()) 

png("pass_quality_week_panel.png", width = 9.5, height = 5.5, units = "in", res = 200)
ggarrange(k_qualtrend, s_qualtrend, common.legend = TRUE, legend = "right")
dev.off()


study_k_rates_lm <- lm_robust(match ~ as.numeric(substr(week, 1, 2)), data = study_k_rates_pass, weights = n)
study_s_rates_lm <- lm_robust(match ~ as.numeric(substr(week, 1, 2)), data = study_s_rates_pass, weights = n)

study_k_rates_lm
study_s_rates_lm


##### Has Attentiveness Bounced Back in 2021? #####

# Study T
mean(study_t$pass_att[study_t$consent == 1 & study_t$dateofstudy %in% c("4/12/2020", "5/17/2020", "5/6/2020")])
length(study_t$pass_att[study_t$consent == 1 & study_t$dateofstudy %in% c("4/12/2020", "5/17/2020", "5/6/2020")])

mean(study_t$pass_att[study_t$consent == 1 & study_t$dateofstudy %in% c("7/22/2020", "9/15/2020", "10/31/2020", "11/8/2020")])
length(study_t$pass_att[study_t$consent == 1 & study_t$dateofstudy %in% c("7/22/2020", "9/15/2020", "10/31/2020", "11/8/2020")])

# Study K
mean(study_k$drop[study_k$consent == "Yes" & as.Date(study_k$date) < as.Date("2020-04-01")] == "keep")
length(study_k$drop[study_k$consent == "Yes" & as.Date(study_k$date) < as.Date("2020-04-01")])

mean(study_k$drop[study_k$consent == "Yes" & as.Date(study_k$date) > as.Date("2020-04-01") & as.Date(study_k$date) < as.Date("2020-07-01")] == "keep")
length(study_k$drop[study_k$consent == "Yes" & as.Date(study_k$date) > as.Date("2020-04-01") & as.Date(study_k$date) < as.Date("2020-07-01")])

mean(study_k$drop[study_k$consent == "Yes" & grepl("2021", study_k$date)] == "keep")
length(study_k$drop[study_k$consent == "Yes" & grepl("2021", study_k$date)])

mean(study_k$drop[study_k$consent == "Yes" & as.Date(study_k$date) > as.Date("2020-04-01") & as.Date(study_k$date) < as.Date("2020-07-01")] == "keep") -
  mean(study_k$drop[study_k$consent == "Yes" & grepl("2021", study_k$date)] == "keep")
t.test(study_k$drop[study_k$consent == "Yes" & as.Date(study_k$date) > as.Date("2020-04-01")] == "keep" ~ 
         grepl("2021", study_k$date)[study_k$consent == "Yes" & as.Date(study_k$date) > as.Date("2020-04-01")])

